This analysis analyzes Arctic Council speeches and national arctic strategy documents by comparing the relative frequencies of words belonging to ten different topics. We have defined these topics by creating dictionaries of commonly mentioned words that relate to each.
The method used here calculates the fraction of words in each document belonging to each topic to calculate a document-level score. It then averages document-level scores for each country to create a country-level score.
This section loads the libraries and the text files sfrom three different folders. It also contains some excess code. Moving forward, we use the dataframes developed from the readtext function, not the corpuses, dtms, or dfms.
This version does not remove stop words, punctuation, etc. This should not adversely affect our results, since we are using a dictionaries method to calculate our scores.
library(igraph)
library(tidyverse)
library(tidytext)
library(readtext)
library(quanteda)
library(dplyr)
library(stringr)
library(plotly)
library(rworldmap)
library(ngram)
#This sets the directory where the texts are located
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Strategy Documents (new)"
#This command reads in all the file names and stores the texts in a tidy dataframe
strategy <- readtext(paste0(DATA_DIR, "/*"))
#This identifies the row names of the dataframe, which are also used in the corpus
#Note that these docnames are not as nice as for the UN Corpus as they vary a bit
row.names(strategy) <- strategy$doc_id
#Do again for the other folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Observer Documents"
observer <- readtext(paste0(DATA_DIR, "/*"))
row.names(observer) <- observer$doc_id
#Third folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Arctic Speeches"
speeches <- readtext(paste0(DATA_DIR, "/*"))
row.names(speeches) <- speeches$doc_id
#Fourth folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Official Speeches and Documents"
officialspeeches <- readtext(paste0(DATA_DIR, "/*"))
row.names(officialspeeches) <- officialspeeches$doc_id
#unnest the tokens (words) and create a new data frame with each word as one row
strategydf <- unnest_tokens(strategy, word, text)
observerdf <- unnest_tokens(observer, word, text)
speechesdf <- unnest_tokens(speeches, word, text)
officialspeechesdf <- unnest_tokens(officialspeeches, word, text)
#combine full text versions
fulltexts <- strategy %>%
full_join(observer) %>%
full_join(speeches) %>%
full_join(officialspeeches)
## Joining, by = c("doc_id", "text")
## Joining, by = c("doc_id", "text")
## Joining, by = c("doc_id", "text")
#combine the three data frames
totaldf <- strategydf %>%
full_join(observerdf) %>%
full_join(speechesdf) %>%
full_join(officialspeechesdf)
## Joining, by = c("doc_id", "word")
## Joining, by = c("doc_id", "word")
## Joining, by = c("doc_id", "word")
#calculate the document lengths
words <- totaldf %>%
group_by(doc_id) %>%
mutate(length=n()) %>%
ungroup()
#separate the doc_id into country and everything that follows
words <- words %>%
mutate(doc_id2=doc_id) %>%
separate(doc_id2, c("country", "misc"), sep = "_")
## Warning: Expected 2 pieces. Additional pieces discarded in 20086 rows
## [154450, 154451, 154452, 154453, 154454, 154455, 154456, 154457, 154458,
## 154459, 154460, 154461, 154462, 154463, 154464, 154465, 154466, 154467,
## 154468, 154469, ...].
#the same for full text
fulltexts <- fulltexts %>%
mutate(doc_id2=doc_id) %>%
separate(doc_id2, c("country", "misc"), sep = "_")
## Warning: Expected 2 pieces. Additional pieces discarded in 13 rows [14, 16,
## 18, 20, 22, 23, 24, 27, 28, 56, 65, 67, 71].
There are ten topics: 1. environment 2. indigenous 3. transport 4. development 5. tourism 6. resources 7. fisheries 8. diplomacy 9. security 10. russia 11. legal
environment <- data.frame(c("research", "science", "scienti", "environment","climate","climate change","ocean","sea","sea level","atmosphere", "air", "ice","warm", "melt","knowledge", "station","glaciological","geological","biological","ecosystem","paleoclimate","laboratory","conservation","preservation","temperature","data","measurement","study","precipitation","pollution","cryospheric","publication","biodiversity","academic", "glacier", "disaster", "observe", "trend","predict","species","force","global warming","protect" ))
colnames(environment) <- "topic"
environment <- mutate(environment, name = "environment")
indigenous <- data.frame(c("nation", "local", "indigenous", "peoples", "community", "human", "social", "lives", "condition", "inhabitants", "well-being", "language", "health", "traditional", "culture", "rural", "residents"))
colnames(indigenous) <- "topic"
indigenous <- mutate(indigenous, name = "indigenous")
transport <- data.frame(c("transportation", "shipping", "import", "export", "maritime", "transport", "ship", "vessel", "navigation", "route", "channel", "northeast passage", "northwest passage", "northern sea route", "voyage", "commercial", "trade", "icebreakers", "water", "transit"))
colnames(transport) <- "topic"
transport <- mutate(transport, name = "transport")
development <- data.frame(c("Sustainable","development","economic","globalization","economic zones","commercial","production","strategy","benefit","capital","market","enterprise","opportunity","business","infrastructure","fund","industry"))
colnames(development) <- "topic"
development <- mutate(development, name = "development")
tourism <- data.frame(c("tourism","tourists","rescue","ecotourism"))
colnames(tourism) <- "topic"
tourism <- mutate(tourism, name = "tourism")
resources <- data.frame(c("oil","industr","resource","technology","energy","gas","carbon","infrastructure","build","exploit","mine","utilization","exploitation","natural","mineral","geothermal","wind","exploration","consumer","pipeline","extraction", "seabed", "metal", "metals", "seafloor", "offshore"))
colnames(resources) <- "topic"
resources <- mutate(resources, name = "resources")
fisheries <- data.frame(c("fish","fisheries","fishing","aquaculture","goods"))
colnames(fisheries) <- "topic"
fisheries <- mutate(fisheries, name = "fisheries")
diplomacy <- data.frame(c("strengthen","joint","relationship","peace","integration","cooperation","international","relations","diplomatic","contribute","parties","stability","equality","participants","connect","multilateral","bilateral","regional","global","coalition","collaboration","coordination","share","same","affairs","harmony","alliance","partnership","freedom","political","meet"))
colnames(diplomacy) <- "topic"
diplomacy <- mutate(diplomacy, name = "diplomacy")
security <- data.frame(c("sovereignty", "state", "nation", "secure", "security", "stakeholder", "governance", "claim", "interests", "territory", "zone", "own", "influence", "military", "defend", "defense", "position", "independent"))
colnames(security) <- "topic"
security <- mutate(security, name = "security")
russia <- data.frame(c("russia"))
colnames(russia) <- "topic"
russia <- mutate(russia, name = "russia")
china <- data.frame(c("china"))
colnames(china) <- "topic"
china <- mutate(china, name = "china")
legal <- data.frame(c("continental shelf", "rule", "UNCLOS", "jurisdiction", "rights", "spitsbergen", "legal", "law", "just", "treaty", "treaties", "regulation", "boundary", "boundaries", "delimitation", "EEZ", "arbitration", "court", "ICJ", "ruling", "protocol", "reservation", "convention", "contiguous zone", "right", "ratification", "ratified"))
colnames(legal) <- "topic"
legal <- mutate(legal, name = "legal")
#defining a function to calculate the scores
# first count the words belonging to each topic in each document and create document score by dividing the count by the length of the document
countwords <- function(topic) {
docscores <- words %>%
inner_join(topic, by= c("word" = "topic")) %>%
group_by(doc_id) %>%
mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
ungroup()
#calculating country scores by averaging document scores
countryscores <- docscores %>%
group_by(country) %>%
summarize(country_score = mean(score)) %>%
mutate(name =topic$name[1] )
}
#Environment
#calcualte scores
environment_scores <- countwords(environment)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
## Warning: package 'bindrcpp' was built under R version 3.4.4
#plot
p.environment <- ggplot(environment_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Environment")
#make interactive
ggplotly(p.environment)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Indigenous
indigenous_scores <- countwords(indigenous)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.indigenous <- ggplot(indigenous_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Indigenous")
ggplotly(p.indigenous)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Transport
transport_scores <- countwords(transport)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.transport <- ggplot(transport_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("transport")
ggplotly(p.transport)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Development
development_scores <- countwords(development)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.development <- ggplot(development_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Development")
ggplotly(p.development)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Tourism
tourism_scores <- countwords(tourism)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.tourism <- ggplot(tourism_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Tourism")
ggplotly(p.tourism)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Resources
resources_scores <- countwords(resources)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.resources <- ggplot(resources_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Resources")
ggplotly(p.resources)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Fisheries
fisheries_scores <- countwords(fisheries)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.fisheries <- ggplot(fisheries_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Fisheries")
ggplotly(p.fisheries)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Diplomacy
diplomacy_scores <- countwords(diplomacy)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.diplomacy <- ggplot(diplomacy_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Diplomacy")
ggplotly(p.diplomacy)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Security
security_scores <- countwords(security)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.security <- ggplot(security_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Security")
ggplotly(p.security)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Russia
russia_scores <- countwords(russia)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.russia <- ggplot(russia_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Russia")
ggplotly(p.russia)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#China
china_scores <- countwords(china)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.china <- ggplot(china_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("China")
ggplotly(p.china)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#legal
legal_scores <- countwords(legal)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.legal <- ggplot(legal_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
ggtitle("Legal")
ggplotly(p.legal)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
##New Score Method
countwords2 <- function(topic){
docscores <- fulltexts
docscores$count <- sapply(fulltexts$text, function(x) sum(apply(topic, 1, function(z) str_count(x, z))))
docscores$doclength <- sapply(fulltexts$text, function(x) wordcount(x))
docscores <- docscores %>%
mutate(score=count/doclength)
#calculating country scores by averaging document scores
countryscores <- docscores %>%
group_by(country) %>%
summarize(country_score = mean(score)) %>%
mutate(name =topic$name[1] )
}
#countgraph <- function(topic){
#counts1 <- countwords(topic)
#counts2 <- countwords2(topic)
#name <- paste0(topic$name[1],"_scoresall")
#countsall <- full_join(counts1, counts2, by="country")
#assign(as.character(name), countsall)
#return(get(name))
#p.2 <-ggplot(countsall) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
# ggtitle(topic)
#print(p.2)
#}
#countgraph(transport)
#Environment
#calcualte scores
environment_scores2 <- countwords2(environment)
environment_all <- full_join(environment_scores, environment_scores2, by = "country")
#plot
p.environment2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Environment")
#make interactive
ggplotly(p.environment2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Indigenous
indigenous_scores2 <- countwords2(indigenous)
environment_all <- full_join(indigenous_scores, indigenous_scores2, by = "country")
#plot
p.indigenous2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Indigenous")
#make interactive
ggplotly(p.indigenous2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Transport
transport_scores2 <- countwords2(transport)
environment_all <- full_join(transport_scores, transport_scores2, by = "country")
#plot
p.transport2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Transport")
#make interactive
ggplotly(p.transport2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Development
development_scores2 <- countwords2(development)
development_all <- full_join(development_scores, development_scores2, by = "country")
#plot
p.development2 <- ggplot(development_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Development")
#make interactive
ggplotly(p.development2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Tourism
tourism_scores2 <- countwords2(tourism)
tourism_all <- full_join(tourism_scores, tourism_scores2, by = "country")
#plot
p.tourism2 <- ggplot(tourism_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Tourism")
#make interactive
ggplotly(p.transport2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Resources
resources_scores2 <- countwords2(resources)
resources_all <- full_join(resources_scores, resources_scores2, by = "country")
#plot
p.resources2 <- ggplot(resources_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Resources")
#make interactive
ggplotly(p.resources2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Fisheries
fisheries_scores2 <- countwords2(fisheries)
fisheries_all <- full_join(fisheries_scores, fisheries_scores2, by = "country")
#plot
p.fisheries2 <- ggplot(fisheries_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Fisheries")
#make interactive
ggplotly(p.fisheries2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Diplomacy
diplomacy_scores2 <- countwords2(diplomacy)
diplomacy_all <- full_join(diplomacy_scores, diplomacy_scores2, by = "country")
#plot
p.diplomacy2 <- ggplot(diplomacy_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Diplomacy")
#make interactive
ggplotly(p.diplomacy)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Security
security_scores2 <- countwords2(security)
security_all <- full_join(security_scores, security_scores2, by = "country")
#plot
p.security2 <- ggplot(security_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Security")
#make interactive
ggplotly(p.security2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Legal
legal_scores2 <- countwords2(legal)
legal_all <- full_join(legal_scores, legal_scores2, by = "country")
#plot
p.legal2 <- ggplot(legal_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) + coord_flip() +
ggtitle("Legal")
#make interactive
ggplotly(p.legal2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
mapscores <- function(topic) {
name <- deparse(substitute(topic))
joinCountryData2Map(topic, joinCode = "NAME", nameJoinColumn = "country") %>%
mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = name)
}
mapscores(environment_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
mapscores(indigenous_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
mapscores(transport_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data
mapscores(development_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
mapscores(tourism_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data
mapscores(resources_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
mapscores(fisheries_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data
mapscores(diplomacy_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
mapscores(security_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data
mapscores(russia_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data
mapscores(china_scores)
## 13 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 230 codes from the map weren't represented in your data
mapscores(legal_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data
sentimentscores <- words %>%
inner_join(get_sentiments("bing")) %>%
group_by(doc_id) %>%
filter(sentiment=="positive") %>%
mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
ungroup() %>%
group_by(country) %>%
summarize(country_score = mean(score))
## Joining, by = "word"
joinCountryData2Map(sentimentscores, joinCode = "NAME", nameJoinColumn = "country") %>%
mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = "positive sentiment")
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
fearsentimentscores <- words %>%
inner_join(get_sentiments("nrc")) %>%
group_by(doc_id) %>%
filter(sentiment=="fear") %>%
mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
ungroup() %>%
group_by(country) %>%
summarize(country_score = mean(score))
## Joining, by = "word"
joinCountryData2Map(fearsentimentscores, joinCode = "NAME", nameJoinColumn = "country") %>%
mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = "fear sentiment")
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data
countrydf <- environment_scores %>%
full_join(indigenous_scores) %>%
full_join(transport_scores) %>%
full_join(development_scores) %>%
full_join(tourism_scores) %>%
full_join(resources_scores) %>%
full_join(fisheries_scores) %>%
full_join(diplomacy_scores) %>%
full_join(security_scores) %>%
full_join(russia_scores) %>%
full_join(china_scores) %>%
full_join(legal_scores)
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
countrygraph <- function(country.name) {
countrydf2 <- filter(countrydf, country==country.name)
ggplot(countrydf2, aes(x=name, y=country_score)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_x_discrete(country.name)
}
countrygraph("Canada")
countrygraph("China")
countrygraph("Denmark")
countrygraph("Finland")
countrygraph("France")
countrygraph("Germany")
countrygraph("Greenland")
countrygraph("Iceland")
countrygraph("India")
countrygraph("Italy")
countrygraph("Japan")
countrygraph("Korea")
countrygraph("Netherlands")
countrygraph("Norway")
countrygraph("Poland")
countrygraph("Russia")
countrygraph("Singapore")
countrygraph("Spain")
countrygraph("Sweden")
countrygraph("Switzerland")
countrygraph("UK")
countrygraph("US")
country2df <- environment_scores2 %>%
full_join(indigenous_scores2) %>%
full_join(transport_scores2) %>%
full_join(development_scores2) %>%
full_join(tourism_scores2) %>%
full_join(resources_scores2) %>%
full_join(fisheries_scores2) %>%
full_join(diplomacy_scores2) %>%
full_join(security_scores2) %>%
full_join(legal_scores2)
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
countrygraph2 <- function(country.name) {
countrydf2 <- filter(country2df, country==country.name)
ggplot(countrydf2, aes(x=name, y=country_score)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_x_discrete(paste0(country.name, 2))
}
countrygraph2("Canada")
countrygraph2("China")
countrygraph2("Denmark")
countrygraph2("Finland")
countrygraph2("France")
countrygraph2("Germany")
countrygraph2("Greenland")
countrygraph2("Iceland")
countrygraph2("India")
countrygraph2("Italy")
countrygraph2("Japan")
countrygraph2("Korea")
countrygraph2("Netherlands")
countrygraph2("Norway")
countrygraph2("Poland")
countrygraph2("Russia")
countrygraph2("Singapore")
countrygraph2("Spain")
countrygraph2("Sweden")
countrygraph2("Switzerland")
countrygraph2("UK")
countrygraph2("US")
countrymutli <- country2df %>%
filter(country=="US"|country=="Russia"|country=="China") %>%
filter(name!="tourism" & name!= "environment" & name != "fisheries")
p.countrymulti <- ggplot(countrymutli, aes(x=name, y=country_score, fill=country)) + geom_bar(stat="identity", position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly(p.countrymulti)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
scoremean <- mean(countrydf$country_score)
scoresd <- sd(countrydf$country_score)
countrydf <- countrydf %>%
mutate(sdmean = (country_score-scoremean)/scoresd)
scoresq <- quantile(countrydf$sdmean, probs = c(0, 0.25, 0.5, 0.75, 1))
countrydf <- countrydf %>%
mutate(rank = ifelse(sdmean < scoresq[2], "very low",
ifelse(sdmean>scoresq[2] & sdmean < scoresq[3], "low",
ifelse(sdmean>scoresq[3] & sdmean < scoresq[4], "medium", "high"))))
cleantable <- countrydf %>% spread(name, rank) %>%
select(country, china:transport) %>%
group_by(country) %>%
arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security, china, russia)
cleantable
## # A tibble: 232 x 13
## # Groups: country [22]
## country china development diplomacy environment fisheries indigenous
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Canada <NA> <NA> <NA> high <NA> <NA>
## 2 Canada <NA> <NA> high <NA> <NA> <NA>
## 3 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 4 Canada <NA> <NA> <NA> <NA> <NA> high
## 5 Canada <NA> medium <NA> <NA> <NA> <NA>
## 6 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 7 Canada <NA> <NA> <NA> <NA> very low <NA>
## 8 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 9 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 10 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 222 more rows, and 6 more variables: legal <chr>,
## # resources <chr>, russia <chr>, security <chr>, tourism <chr>,
## # transport <chr>
write.csv(cleantable, file="cleantable.csv")
score2mean <- mean(country2df$country_score)
score2sd <- sd(country2df$country_score)
country2df <- country2df %>%
mutate(sdmean = (country_score-score2mean)/score2sd)
scores2q <- quantile(country2df$sdmean, probs = c(0, 0.25, 0.5, 0.75, 1))
country2df <- country2df %>%
mutate(rank = ifelse(sdmean < scores2q[2], "very low",
ifelse(sdmean>scores2q[2] & sdmean < scores2q[3], "low",
ifelse(sdmean>scores2q[3] & sdmean < scores2q[4], "medium", "high"))))
cleantable2 <- country2df %>% spread(name, rank) %>%
select(country, development:transport) %>%
group_by(country) %>%
arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security)
cleantable2
## # A tibble: 215 x 11
## # Groups: country [22]
## country development diplomacy environment fisheries indigenous legal
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Canada <NA> <NA> high <NA> <NA> <NA>
## 2 Canada <NA> low <NA> <NA> <NA> <NA>
## 3 Canada <NA> <NA> <NA> <NA> <NA> low
## 4 Canada <NA> <NA> <NA> <NA> medium <NA>
## 5 Canada high <NA> <NA> <NA> <NA> <NA>
## 6 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 7 Canada <NA> <NA> <NA> very low <NA> <NA>
## 8 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 9 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 10 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 205 more rows, and 4 more variables: resources <chr>,
## # security <chr>, tourism <chr>, transport <chr>
write.csv(cleantable2, file="cleantable2.csv")
scores3q <- quantile(country2df$sdmean, probs = c(0, 0.20, 0.40, 0.60, 1))
country3df <- country2df %>%
mutate(rank = ifelse(sdmean < scores3q[2], "very low",
ifelse(sdmean>scores3q[2] & sdmean < scores3q[3], "low",
ifelse(sdmean>scores3q[3] & sdmean < scores3q[4], "medium", "high"))))
cleantable3 <- country3df %>% spread(name, rank) %>%
select(country, development:transport) %>%
group_by(country) %>%
arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security)
cleantable3
## # A tibble: 215 x 11
## # Groups: country [22]
## country development diplomacy environment fisheries indigenous legal
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Canada <NA> <NA> high <NA> <NA> <NA>
## 2 Canada <NA> low <NA> <NA> <NA> <NA>
## 3 Canada <NA> <NA> <NA> <NA> <NA> low
## 4 Canada <NA> <NA> <NA> <NA> medium <NA>
## 5 Canada high <NA> <NA> <NA> <NA> <NA>
## 6 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 7 Canada <NA> <NA> <NA> very low <NA> <NA>
## 8 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 9 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## 10 Canada <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 205 more rows, and 4 more variables: resources <chr>,
## # security <chr>, tourism <chr>, transport <chr>
write.csv(cleantable3, file="cleantable3.csv")
membersdf <- country3df
membersdf$member <- ifelse(membersdf$country=="Canada"|membersdf$country=="Denmark" | membersdf$country=="Greenland" | membersdf$country=="Finland" | membersdf$country == "Iceland" | membersdf$country=="Norway" | membersdf$country == "Russia" | membersdf$country == "Sweden" | membersdf$country=="US", "member", "nonmember'")
multi.indigenous <- ggplot(filter(membersdf, name=="indigenous"), aes(x=reorder(country,country_score), y=country_score, color=member)) + geom_point() + coord_flip() +
ggtitle("Indigenous")
ggplotly(multi.indigenous)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
multi.resources <- ggplot(filter(membersdf, name=="resources"), aes(x=reorder(country,country_score), y=country_score, color=member)) + geom_point() + coord_flip() +
ggtitle("resources")
ggplotly(multi.resources)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
multi.security <- ggplot(filter(membersdf, name=="security"), aes(x=reorder(country,country_score), y=country_score, color=member)) + geom_point() + coord_flip() +
ggtitle("Security")
ggplotly(multi.security)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
multi.diplomacy <- ggplot(filter(membersdf, name=="diplomacy"), aes(x=reorder(country,country_score), y=country_score, color=member)) + geom_point() + coord_flip() +
ggtitle("diplomacy")
ggplotly(multi.diplomacy)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`